#!/usr/bin/env bash
set -e

# setting colors to use
GREEN='\033[0;32m'
RED='\033[0;31m'
NC='\033[0m'


if [ "$#" == 0 ] || [ $1 == "-h" ] || [ $1 == "help" ] || [ $1 == "--help" ]; then
    printf "\n  This script uses taxonkit in a standard fashion to get lineage info from NCBI taxids.\n"
    printf "  It expects a single column file of taxids with no header, return table in the same order.\n"
    printf "  Thanks go to taxonkit, don't forget to cite that if using: https://bioinf.shenwei.me/taxonkit/.\n"
    printf "  For version info, run \`bit-version\`.\n\n"
    printf "    Usage:\n\t bit-get-lineage-from-taxids -i taxids.txt -o lineages.tsv\n\n"
    exit
fi

## parsing arguments
while getopts :i:o: args
do
    case "${args}"
    in
        i) taxids_file=${OPTARG};;
        o) output_file=${OPTARG};;
        \?) printf "\n  ${RED}Invalid argument: -${OPTARG}${NC}\n\n    Run 'bit-get-lineage-from-taxids' with no arguments or '-h' only to see help menu.\n\n" >&2 && exit
    esac
done

## checking variables are good
if [ -z $taxids_file ]; then
    printf "\n  Please specify an input taxid file to the '-i' argument.\n"
    printf "\nExiting for now.\n\n"
    exit
fi


if [ ! -f $taxids_file ]; then
    printf "\n  The specified input file, $taxids_file, doesn't seem to be where we think it is :( \n"
    printf "\nExiting for now.\n\n"
    exit
fi

if [ -z $output_file ]; then
    printf "\n  Please specify an output file to the '-o' argument.\n"
    printf "\nExiting for now.\n\n"
    exit
fi

### checking that ncbi tax data is present already, and downloading if it isn't
helper-bit-get-ncbi-tax-data

cat $taxids_file | taxonkit lineage | taxonkit reformat -r NA | cut -f 1,3 | tr ";" "\t" > lineages.tmp

cat <(printf "taxid\tdomain\tphylum\tclass\torder\tfamily\tgenus\tspecies\n") lineages.tmp > $output_file

rm lineages.tmp
